{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup\n",
    "localtext = open('10.html', 'r', encoding='utf8')\n",
    "sp = BeautifulSoup(localtext, 'lxml')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<!DOCTYPE html>\n",
       "<html lang=\"en\">\n",
       "<head>\n",
       "<meta charset=\"utf-8\"/>\n",
       "<meta content=\"width=device-width, initial-scale=1.0\" name=\"viewport\"/>\n",
       "<meta content=\"ie=edge\" http-equiv=\"X-UA-Compatible\"/>\n",
       "<title>text和string</title>\n",
       "</head>\n",
       "<body>\n",
       "<div id=\"divid\">\n",
       "<p>p1</p>\n",
       "<p>p2</p>\n",
       "<p>p3</p>\n",
       "<p id=\"cp\">\n",
       "            cp1\n",
       "            <span>s1</span>\n",
       "            cp2\n",
       "            <span>s2</span>\n",
       "            cp3\n",
       "        </p>\n",
       "<a href=\"a1\">a1</a>\n",
       "<a href=\"a2\">a2</a>\n",
       "<a href=\"a3\">a3</a>\n",
       "</div>\n",
       "</body>\n",
       "</html>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\n\\n\\n\\n\\ntext和string\\n\\n\\n\\np1\\np2\\np3\\n\\n            cp1\\n            s1\\n            cp2\\n            s2\\n            cp3\\n        \\na1\\na2\\na3\\n\\n\\n'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 获取当前标签【sp】内部，子孙级全部的文本，并通过拼接，返回一个长字符串\n",
    "sp.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 获取当前标签【sp】内部，子级标签\n",
    "## 只支持一个文本，否则没有内容\n",
    "sp.string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<p>p1</p>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sp.p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'p1'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sp.p.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<a href=\"a1\">a1</a>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sp.a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a1\n",
      "a1\n"
     ]
    }
   ],
   "source": [
    "print(sp.a.text)\n",
    "print(sp.a.string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "tp = sp.find('p', {'id':'cp'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<p id=\"cp\">\n",
       "            cp1\n",
       "            <span>s1</span>\n",
       "            cp2\n",
       "            <span>s2</span>\n",
       "            cp3\n",
       "        </p>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\n            cp1\\n            s1\\n            cp2\\n            s2\\n            cp3\\n        '"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tp.text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "tp.string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NoneType"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(tp.string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
